#!/usr/bin/perl -w

use XML::Parser;

my $sxout;
my @estack;
my $epreserve;

my $p = XML::Parser->new(
  Handlers => {
    Init => \&sxInit,
    Final => \&sxFinal,
    Start => \&sxStart,
    End => \&sxEnd,
    Char => \&sxChar,
  },
  ProtocolEncoding => 'US-ASCII',
  Stream_Delimiter => '>&&<',
);

use Data::Dumper;
use Benchmark;
my $sx;
timethis(500, sub {$sx = xml2sx()});
print Dumper $sx;

###

sub SX_ELEM () {0}
sub SX_ATTR () {1}
sub SX_CONT () {2}

sub xml2sx {
  $p->parsefile(':parsetest.xml');
  return $sxout;
}

sub sxInit {
  my ($exp) = @_;
  $sxout = undef;
  $epreserve = 0;
}

sub sxFinal {
  my ($exp) = @_;
  @estack = ();
}

sub sxStart {
  my ($exp, $elem, %attr) = @_;
  my $sx = [$elem, {%attr}];
  if (@estack) {
    push @{$estack[-1]}, $sx;
  } else {
    !$sxout or die "xpe[second root element encountered]"; 
    $sxout = $sx;
  }
  push @estack, $sx;
  $epreserve++ if ($sx->[SX_ATTR]{'xml:space'} || '') eq 'preserve';
}

sub sxEnd {
  my ($exp, $elem) = @_;
  my $sx = pop @estack;
  $sx->[0] eq $elem or die "xpe[end tag '$elem' doesn't match start tag '$sx->[0]']";
  for (my $i = SX_CONT; $i < $#$sx; $i++) {
    if (!ref $sx->[$i] and !ref $sx->[$i+1]) {
      splice @$sx, $i, 2, join('', @$sx[$i..$i+1]);
      $i--;
    }
  }
  if (!$epreserve) {
    for (@$sx) {
      next if ref;
      s/[\cM\cJ\cI ]+/ /g;
    }
    if ($#$sx >= SX_CONT) {
      $sx->[SX_CONT] =~ s/^ +// unless ref $sx->[SX_CONT];
      $sx->[-1]      =~ s/ +$// unless ref $sx->[-1];
    }
  }
  $epreserve-- if ($sx->[SX_ATTR]{'xml:space'} || '') eq 'preserve';
}

sub sxChar {
  my ($exp, $text) = @_;
  @estack or die "xpe[text encountered ouside of any element]";
  my $node = $estack[-1];
  push @$node, $text;
}